rm(list = ls())
options(scipen = 999)

# Load the necessary libraries
library(Synth)
library(dplyr)
library(gsynth)
library(writexl)
library(ggplot2)

data <- read.csv("base_4dic.csv", row.names = 1) #GDP Per Capita Constant 2015 - WORLD BANK
data <- data %>% subset(year <= 2009)

data$country_name <- as.character(data$country_name)
data$country_code <- as.character(data$country_code)

country_left = c('China', 'Philippines', 'Canada', 'United States', 'South Africa','Australia') # BENCHMARK

treated_country = 'Chile'
intervention_year = 1990

# Identify the pre-treatment and post-treatment periods
pre_treatment_period <- c(1975:(intervention_year-1))   # Replace with the years of your pre-treatment period
post_treatment_period <- c(intervention_year: 2009)  # Replace with the years of your post-treatment period

# Create the outcome and covariates matrices
outcome <- data[data$country_name == treated_country, "gdp_percapita"]

covariate_names <- c(names(data)[5:16]) #[5:16]

# Chequeando variables excluidas
names(data)[!names(data) %in% covariate_names]

covariates <- data %>% subset(country_name != treated_country) %>% select(all_of(covariate_names))

data$country_index <- as.factor(data$country_code)
data$country_index <- as.numeric(data$country_index)

# Guarda los resultados

library(car)
model <- lm(gdp_percapita ~ ., data=data %>% select(c(all_of(c('gdp_percapita', covariate_names)))))
vif_values <- vif(model)
print(vif_values)

treated_country_index = unique(data$country_index[which(data$country_name == treated_country)]) #dejar afuera tratado
left_country_index = unique(data$country_index[which(data$country_name == country_left)]) #dejar afuera leave one out
if(length(treated_country_index) > 1){treated_country_index = treated_country_index[1]}


# CV  ------------------------------------------------------------
library(MSCMT)
data <- data %>% subset(!country_name %in% c(country_left, "Congo, Dem Rep") )
data = data %>% mutate(country_index = as.numeric(as.factor(country_name)))

data_long <- listFromLong(data, unit.variable="country_index", time.variable="year", unit.names.variable="country_name")
head(data_long)

treatment.identifier <- "Chile"
controls.identifier  <- setdiff(colnames(data_long[[1]]),
                                c(treatment.identifier, "Congo, Dem Rep"))

# Períodos de entrenamiento y validación para la variable dependiente
times.dep <- cbind("gdp_percapita" = c(1975, 1989))

# Períodos de entrenamiento y validación para los predictores
times.pred <- cbind("pop_growth"                            = c(1975, 1989),
                    "kg"                                      = c(1975, 1989),
                    "ki"                                      = c(1975, 1989),
                    "openk"                                   = c(1975, 1989),
                    "polity2"                                 = c(1975, 1989),
                    "durable"                                 = c(1975, 1989),
                    "xconst"                                  = c(1975, 1989),
                    "life_exp"                                = c(1975, 1989),
                    "fert_rate"                               = c(1975, 1989),
                    "Birth.rate..crude..per.1.000.people."    = c(1975, 1989),
                    "AV..Years.of.total.schooling.AG.15_999"  = c(1975, 1989),
                    "AV..Years.of.primary.schooling.AG.15_999"= c(1975, 1989))

agg.fns <- rep("mean", ncol(times.pred))


############################################################################################################
# Definiendo periodos CV
year_fin_training_cv_1 = 1984
year_fin_training_cv_2 = 1980

periodo_training_1 = c(1975,year_fin_training_cv_1)
periodo_validation_1 = c(periodo_training_1[2] + 1, 1989)
periodo_training_2 = c(1975,year_fin_training_cv_2)
periodo_validation_2 = c(periodo_training_2[2] + 1, 1989)


############################################################################################################
### Definiendo CV 1
# Periodo de entrenamiento para las variables predictoras
times.pred.training_1 <- cbind("pop_growth"                            = periodo_training_1,
                               "kg"                                      = periodo_training_1,
                               "ki"                                      = periodo_training_1,
                               "openk"                                   = periodo_training_1,
                               "polity2"                                 = periodo_training_1,
                               "durable"                                 = periodo_training_1,
                               "xconst"                                  = periodo_training_1,
                               "life_exp"                                = periodo_training_1,
                               "fert_rate"                               = periodo_training_1,
                               "Birth.rate..crude..per.1.000.people."    = periodo_training_1,
                               "AV..Years.of.total.schooling.AG.15_999"  = periodo_training_1,
                               "AV..Years.of.primary.schooling.AG.15_999"= periodo_training_1)

# Periodo de validación para la variable dependiente
times.dep.validation_1 <- cbind("gdp_percapita" = periodo_validation_1)
############################################################################################################
### Definiendo CV 2
# Periodo de entrenamiento para las variables predictoras
times.pred.training_2 <- cbind("pop_growth"                              = periodo_training_2,
                               "kg"                                      = periodo_training_2,
                               "ki"                                      = periodo_training_2,
                               "openk"                                   = periodo_training_2,
                               "polity2"                                 = periodo_training_2,
                               "durable"                                 = periodo_training_2,
                               "xconst"                                  = periodo_training_2,
                               "life_exp"                                = periodo_training_2,
                               "fert_rate"                               = periodo_training_2,
                               "Birth.rate..crude..per.1.000.people."    = periodo_training_2,
                               "AV..Years.of.total.schooling.AG.15_999"  = periodo_training_2,
                               "AV..Years.of.primary.schooling.AG.15_999"= periodo_training_2)

# Periodo de validación para la variable dependiente
times.dep.validation_2 <- cbind("gdp_percapita" = periodo_validation_2)


############################################################################################################
###### Entrenando Primera CV
# Uso de la función mscmt con validación cruzada
res_cv_1 <- mscmt(
  data = data_long,
  treatment.identifier = treatment.identifier,
  controls.identifier = controls.identifier,
  times.dep = times.dep,
  times.pred = times.pred,
  agg.fns = agg.fns,
  seed = 42,
  verbose = TRUE,
  debug = T,
  times.pred.training = times.pred.training_1,
  times.dep.validation = times.dep.validation_1
)

###### Entrenando Segunda CV
# Uso de la función mscmt con validación cruzada
res_cv_2 <- mscmt(
  data = data_long,
  treatment.identifier = treatment.identifier,
  controls.identifier = controls.identifier,
  times.dep = times.dep,
  times.pred = times.pred,
  agg.fns = agg.fns,
  seed = 42,
  verbose = TRUE,
  debug = T,
  times.pred.training = times.pred.training_2,
  times.dep.validation = times.dep.validation_2
)


ggplot(res_cv_1[[1]], type="comparison") # CV 1
ggplot(res_cv_2[[1]], type="comparison") # CV 2
  
